home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
cocktail
/
docme.lha
/
doc.me
/
scanex.me
< prev
next >
Wrap
Text File
|
1992-09-25
|
44KB
|
1,608 lines
.\" use: pic | tbl | eqn | ditroff -me
.\"
.\" "@(#)bibmac.me 2.2 9/9/83";
.de IP
.ip \\$1 \\$2
..
.de LP
.lp
..
.\" @(#)bmac.std 2.2 9/9/83;
.\" standard format troff commands
.\" citation formatting strings
.ds [[ [
.ds ]] ]
.ds ], ,\|
.ds ]- -
.ds [. " \&
.ds .] .
.ds [, " \&
.ds ,] ,
.ds [? " \&
.ds ?] ?
.ds [: " \&
.ds :] :
.ds [; " \&
.ds ;] ;
.ds [! " \&
.ds !] !
.ds [" " \&
.ds "] \&"
.ds [' " \&
.ds '] '
.ds [< " \&
.ds >]
.\" reference formmating strings
.ds a] " \&
.ds b] , \&
.ds c] , \&
.ds n] "\& and \&
.ds m] "\& and \&
.ds p] .
.\" reference formmating macros
.de s[ \" start reference
.nh
.IP [\\*([F] 5m
..
.de e[ \" end reference
.[-
..
.de [] \" start to display collected references
.LP
..
.de ][ \" choose format
.ie !"\\*([J"" \{\
. ie !"\\*([V"" .nr t[ 1 \" journal
. el .nr t[ 5 \" conference paper
.\}
.el .ie !"\\*([B"" .nr t[ 3 \" article in book
.el .ie !"\\*([R"" .nr t[ 4 \" technical report
.el .ie !"\\*([I"" .nr t[ 2 \" book
.el .nr t[ 0 \" other
.\\n(t[[
..
.de 0[ \" other
.s[
.if !"\\*([A"" \\*([A\\c
.if !"\\*([T"" , \\*([T\\c
.if !"\\*([V"" , Vol. \\*([V\\c
.if !"\\*([O"" , \\*([O\\c
.if !"\\*([D"" , \\*([D\\c
\&.
.e[
..
.de 1[ \" journal article
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
\\fI\\*([J \\*([V\\fP\c
.if !"\\*([N"" ,\\*([N
.if !"\\*([D"" (\\*([D)\c
.if !"\\*([P"" , \\*([P\c
.if !"\\*([I"" , \\*([I\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 2[ \" book
.s[
.ie !"\\*([A"" \\*([A,
.el .if !"\\*([E"" \{\
. ie \\n([E-1 \\*([E, eds.,
. el \\*([E, ed.,\}
.if !"\\*([T"" \\fI\\*([T\\fP,
.rm a[
.if !"\\*([I"" .ds a[ \\*([I
.if !"\\*([C"" \{\
. if !"\\*(a["" .as a[ , \\&
. as a[ \\*([C\}
.if !"\\*([D"" \{\
. if !"\\*(a["" .as a[ , \\&
. as a[ \\*([D\}
\\*(a[.
.if !"\\*([G"" Gov. ordering no. \\*([G.
.if !"\\*([O"" \\*([O.
.e[
..
.de 3[ \" article in book
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
in \\fI\\*([B\\fP\c
.if !"\\*([V"" , vol. \\*([V
.if !~\\*([E~~ \{\
. ie , \\n([E-1 \\*([E (editors)\c
. el , \\*([E (editor)\c\}
.if !"\\*([I"" , \\*([I\c
.if !"\\*([C"" , \\*([C\c
.if !"\\*([D"" , \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 4[ \" report
.s[
.if !"\\*([A"" \\*([A,
.if !~\\*([E~~ \{\
. ie \\n([E-1 \\*([E, editors.
. el \\*([E, editor.\}
\\*([T,
\\*([R\c
.if !"\\*([G"" \& (\\*([G)\c
.if !"\\*([I"" , \\*([I\c
.if !"\\*([C"" , \\*([C\c
.if !"\\*([D"" , \\*([D\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 5[ \" conference paper
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
\\fI\\*([J\\fP,
.if !"\\*([C"" \\*([C,
.if !"\\*([D"" \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de [- \" clean up after yourself
.rm [A [B [C [D
.rm [E [F [G
.rm [I [J [K
.rm [N [O [P
.rm [R [T
.rm [V [W
..
.\" @(#)bmac.std 2.2 8/24/83;
.\" standard format troff commands
.\" citation formatting strings
.ds [[ [
.ds ]] ]
.ds ], ,\|
.ds ]- -
.ds [. " \&
.ds .] .
.ds [, " \&
.ds ,] ,
.ds [< " \&
.ds >]
.\" reference formmating strings
.ds c] , \&
.ds n] "" and \&
.ds m] "" and \&
.ds a] " \&
.\" reference formmating macros
.de s[ \" start reference
.nh
.IP [\\*([F] 5m
..
.de e[ \" end reference
.[-
..
.de [] \" start to display collected references
.SH
References
.LP
..
.de ][ \" choose format
.ie !"\\*([J"" \{\
. ie !"\\*([V"" .nr t[ 1 \" journal
. el .nr t[ 5 \" conference paper
.\}
.el .ie !"\\*([B"" .nr t[ 3 \" article in book
.el .ie !"\\*([R"" .nr t[ 4 \" technical report
.el .ie !"\\*([I"" .nr t[ 2 \" book
.el .nr t[ 0 \" other
.\\n(t[[
..
.de 0[ \" other
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
.if !"\\*([O"" \\*([O\c
.if !"\\*([D"" , \\*([D\c
\&.
.e[
..
.de 1[ \" journal article
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
\\fI\\*([J \\*([V\\fP,
.if !"\\*([N"" \\*([N
.if !"\\*([D"" (\\*([D),
.if !"\\*([P"" \\*([P\c
.if !"\\*([I"" , \\*([I\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 2[ \" book
.s[
.ie !"\\*([A"" \\*([A,
.el .if !"\\*([E"" \{\
. ie \\n([E-1 \\*([E, eds.,
. el \\*([E, ed.,\}
.if !"\\*([T"" \\fI\\*([T\\fP,
.rm a[
.if !"\\*([I"" .ds a[ \\*([I
.if !"\\*([C"" \{\
. if !"\\*(a["" .as a[ , \\&
. as a[ \\*([C\}
.if !"\\*([D"" \{\
. if !"\\*(a["" .as a[ , \\&
. as a[ \\*([D\}
\\*(a[.
.if !"\\*([G"" Gov. ordering no. \\*([G.
.if !"\\*([O"" \\*([O.
.e[
..
.de 3[ \" article in book
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
in \\fI\\*([B\\fP,
.if !"\\*([V"" vol. \\*([V,
.if !"\\*([E"" \\*([E (ed.),
.if !"\\*([I"" \\*([I,
.if !"\\*([C"" \\*([C,
.if !"\\*([D"" \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 4[ \" report
.s[
.if !"\\*([A"" \\*([A,
\\*([T,
\\*([R\c
.if !"\\*([G"" \& (\\*([G)\c
.if !"\\*([I"" , \\*([I\c
.if !"\\*([C"" , \\*([C\c
.if !"\\*([D"" , \\*([D\c
\\&.
.if !"\\*([O"" , \\*([O.
.e[
..
.de 5[ \" conference paper
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*([T,
\\fI\\*([J\\fP,
.if !"\\*([C"" \\*([C\c
.if !"\\*([D"" , \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" , \\*([O.
.e[
..
.de [- \" clean up after yourself
.rm [A [B [C [D
.rm [E [F [G
.rm [I [J [K
.rm [N [O [P
.rm [R [T
.rm [V [W
..
.if t \{ \
.pl 29.7c \" page length
.po 2.5c \" page offset (left margin)
.ll 16.5c \" line length
.lt 16.5c \" title length
.nr LL 16.5c
.nr )l 29.7c
.nr hm 2c
.nr $r 9 \" factor for vertical spacing
.nr $R \n($r
.sz 12 \" font size
.nr pp 12
.nr sp 12
.nr tp 12
.nr fp 10
.hc ~ \" hyphenation character
. \" Umlauts and sharp s
.ds A \(A:
.ds O \(O:
.ds U \(U:
.ds a \(a:
.ds o \(o:
.ds u \(u:
.ds s \(ss
. \" UMLAUT \*:u, etc.
.ds : \v'-0.6m'\h'(1u-(\\n(.fu%2u))*0.13m+0.06m'\z.\h'0.2m'\z.\h'-((1u-(\\n(.fu%2u))*0.13m+0.26m)'\v'0.6m'
.\}
.if n \{ \
.po 0 \" page offset (left margin)
.ll 78 \" line length
.lt 78 \" title length
.nr $r 4 \" factor for vertical spacing
.nr $R \n($r
.hc ~ \" hyphenation character
. \" Umlaute und scharfes s
.ds A Ae
.ds O Oe
.ds U Ue
.ds a ae
.ds o oe
.ds u ue
.ds s sz
.\}
.de _
\&\\$1\l'|0\(ul'\\$2
..
.de FT \" font for programs
.ft C
.sz -2
..
.de FR
.ft R
.sz +2
..
.de [] \" start to display collected references
.uh References
.lp
..
.de $0 \" collect table of contents
.(x
.ta 2c
.ie '\\$2'' \\$1
.el \\$2. \\$1
.)x
..
.de np
.nr $p +1
.ip \\n($p.
..
.de SH
.sp 0.5
.in -3
.r \\$1
.sp 0.5
.in +3
..
.de PP
.sp 0.5
..
.de IP
.ip \\$1 \\$2
..
.de I
.i \\$1
..
.de TH
..
.hc @
.EQ
gsize 12
delim $$
.EN
.b " "
.sp 1c
.ta 9c
.ft R
.sz 12
\l'17.1c'
.nf
Selected Examples of
Scanner Specifications
J. Grosch
\l'17.1c'
.sp 12.5c
\l'17.1c'
.ft H
.nf
GESELLSCHAFT F\*UR MATHEMATIK
UND DATENVERARBEITUNG MBH
FORSCHUNGSSTELLE F\*UR
PROGRAMMSTRUKTUREN
AN DER UNIVERSIT\*AT KARLSRUHE
.r
\l'17.1c'
.bp
.oh ''Scanner Specification'%'
.eh ''Scanner Specification'%'
.ce 99
.sz 20
.b " "
.sp 2
Project
.sp
.b "Compiler Generation"
.sp
.sz 12
\l'15c'
.sp
.sz 16
.b "Selected Examples of Scanner Specifications"
.sp 2
Josef Grosch
.sp 2
.sz 14
Mar. 8, 1988
.sp
.sz 12
\l'15c'
.sp 2
Report No. 7
.sp 2
Copyright \(co 1988 GMD
.sp 2
Gesellschaft f\*ur Mathematik und Datenverarbeitung mbH
Forschungsstelle an der Universit\*at Karlsruhe
Vincenz-Prie\*snitz-Str. 1
D-7500 Karlsruhe
.ce 0
.fi
.bp 2
.sh 1 Introduction
.pp
Among the tokens to be recognized by scanners are a few that require non
trivial processing: comments, strings, and character constants.
Even identifiers and keywords may cause some trouble if the language defines
upper-case and lower-case letters to have the same meaning.
The problems with these tokens are the following:
.ip -
maintaining the line count during tokens extending on several lines
.ip -
maintaining the column count during tokens containing tab characters
.ip -
computation of the source position of tokens extending on several lines or
of compound tokens which are recognized as a sequence of subtokens
.ip -
nested comments
.ip -
report unclosed strings and comments as errors
.ip -
computing the internal representation of strings
.ip -
conversion of escape sequences such as doubled string delimiters or
preceding escape characters
.ip -
normalization of upper-case and lower-case letters
.lp
The following chapters contain solutions to the above problems for the
languages Pascal, Modula, C, and Ada. The solutions are scanner specifications
suitable as input for the scanner generator Rex\*([<\*([[Gro87\*(]]\*(>].
The primary intention of this paper is to serve as a reference manual
containing examples for non trivial cases.
All specifications use C as target language except the chapter on Modula
which uses Modula.
The Appendix contains a complete scanner specification for Ada with Modula
as target language.
.sh 1 Pascal
.pp
.sh 2 Comments
.lp
Problems to solve:
.ip -
unclosed comments
.ip -
newline characters
.ip -
tab characters
.lp
.(b L
Solution:
.sp 0.5
.FT
EOF {IF yyStartState = Comment THEN Error ("unclosed comment"); END;}
DEFINE CmtCh = - {*\\\\}\\\\t\\\\n}.
START Comment
RULE
"(*" | "{" :- {yyStart (Comment);}
#Comment# "*)" | "}" :- {yyStart (STD);}
#Comment# "*" | CmtCh + :- {}
.)b
.pp
Comments are processed in a separate start state called
.i Comment.
Everything is skipped in this state except closing comment
brackets which switch back to start state STD.
The single characters '*' or '}'
which can start a closing comment bracket have to be skipped
separately. Otherwise closing comment brackets would not be
recognized because of the "longest match" rule of Rex.
An unclosed comment is indicated by reaching end of file while in start state
.i Comment.
We presuppose the existence of a procedure
.i Error
to report this condition.
We don't need to care about tab and newline characters other than excluding
them from the set
.i CmtCh
because the two rules needed for this problem are already predefined by
Rex:
.lp
.(b L
.FT
#Comment# \\\\t :- {yyTab;}
#Comment# \\\\n :- {yyEol (0);}
.)b
.sh 2 Identifiers
.lp
Problems to solve:
.ip -
normalization of upper-case and lower-case letters
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "Idents.h"
# include "Positions.h"
typedef struct {
tPosition Position;
tIdent Ident;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokIdentifier ...
void ErrorAttribute (Token, Attribute)
int Token;
tScanAttribute * Attribute;
{
Attribute->Ident = NoIdent;
}
}
LOCAL {char String [256]; int L;}
DEFINE letter = {A-Z a-z}.
digit = {0-9}.
RULE
letter (letter | digit) * : {L = GetLower (String);
Attribute.Ident = MakeIdent (String, L); return TokIdentifier;}
.)b
.pp
Normalization of upper-case and lower-case letters to lower-case is done by the
predefined operation
.i GetLower
of Rex.
.bp
.sh 2 "Character Constants"
.lp
Problems to solve:
.ip -
conversion
.ip -
tab characters
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "Positions.h"
typedef struct {
tPosition Position;
char Char;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokCharConst ...
void ErrorAttribute (Token, Attribute)
int Token;
tScanAttribute * Attribute;
{
Attribute->Char = '\\\\0';
}
}
RULE
\&'''' : {Attribute.Char = '\\\\''; return TokCharConst;}
\&' \\\\t ' : {Attribute.Char = '\\\\t'; yyTab2 (1, 1); return TokCharConst;}
\&' ANY ' : {Attribute.Char = TokenPtr [1]; return TokCharConst;}
.)b
.pp
In this example the order of the rules is significant because the last rule
would also match the characters of the preceding one.
.bp
.sh 2 "Strings"
.lp
Problems to solve:
.ip -
conversion
.ip -
doubled delimiters
.ip -
tab characters
.ip -
unclosed strings (at end of lines)
.ip -
source position
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "StringMem.h"
# include "Positions.h"
typedef struct {
tPosition Position;
tStringRef StringRef;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokString ...
void ErrorAttribute (Token, Attribute) ...
}
LOCAL {char String [256]; int L;}
DEFINE StrCh = - {'\\\\t\\\\n}.
START string
RULE
#STD# ' : {yyStart (string); L = 0;}
#string# StrCh +:- {L += GetWord (& String [L]);}
#string# '' :- {String [L ++] = '\\\\'';}
#string# ' :- {yyStart (STD); String [L] = '\\\\0';
Attribute.StringRef = PutString (String, L);
return TokString;}
#string# \\\\t :- {String [L ++] = '\\\\t'; yyTab;}
#string# \\\\n :- {Error ("unclosed string"); yyEol (0);
yyStart (STD); String [L] = '\\\\0';
Attribute.StringRef = PutString (String, L);
return TokString;}
.)b
.pp
We presuppose the existence of a string memory module
.i StringMem.
The procedure
.i PutString
stores a string in the string memory and returns a reference to it which can
be used as attribute of the token
.i TokString.
.bp
.sh 2 "Keywords"
.lp
Problems to solve:
.ip -
normalization of upper-case and lower-case letters
.lp
.(b L
Solution:
.sp 0.5
.FT
GLOBAL {
# define TokAND ...
...
# define TokWITH ...
void ErrorAttribute (Token, Attribute) ...
}
DEFINE A = {Aa}.
...
Z = {Zz}.
RULE
A N D : {return TokAND ;}
...
W I T H : {return TokWITH ;}
.)b
.pp
The idea of the solution is to define identifiers A to Z to stand for the
corresponding upper-case as well as lower-case letters. Then specifying the
keywords in upper-case and spaced does the job.
.bp
.sh 1 Modula
.pp
.sh 2 Comments
.lp
Problems to solve:
.ip -
nested comments
.ip -
unclosed comments
.ip -
newline characters
.ip -
tab characters
.lp
.(b L
Solution:
.sp 0.5
.FT
GLOBAL {VAR NestingLevel: CARDINAL;}
BEGIN {NestingLevel := 0;}
EOF {IF yyStartState = Comment THEN Error ("unclosed comment"); END;}
DEFINE CmtCh = - {*(\\\\t\\\\n}.
START Comment
RULE
#STD, Comment# "(*" :- {INC (NestingLevel); yyStart (Comment);}
#Comment# "*)" :- {DEC (NestingLevel);
IF NestingLevel = 0 THEN yyStart (STD); END;}
#Comment# "(" | "*" | CmtCh + :- {}
.)b
.pp
We need a variable
.i NestingLevel
to count the nesting depth of comments because it is not possible to specify
nested comments by a regular expression. Comments are processed in a separate
start state called
.i Comment.
Everything is skipped in this state except opening or closing comment
brackets which
trigger a change of the nesting level. The single characters '(' and '*'
which can start opening or closing comment brackets have to be skipped
separately. Otherwise comment brackets within comment would not be
recognized because of the "longest match" rule of Rex.
An unclosed comment is indicated by reaching end of file while in start state
.i Comment.
We presuppose the existence of a procedure
.i Error
to report this condition.
We don't need to care about tab and newline characters other than excluding
them from the set
.i CmtCh
because the two rules needed for this problem are already predefined by
Rex:
.lp
.(b L
.FT
#Comment# \\\\t :- {yyTab;}
#Comment# \\\\n :- {yyEol (0);}
.)b
.bp
.sh 2 Strings
.lp
Problems to solve:
.ip -
conversion
.ip -
tab characters
.ip -
unclosed strings (at end of lines)
.ip -
source position
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
FROM StringMem IMPORT tStringRef;
FROM Positions IMPORT tPosition;
TYPE tScanAttribute = RECORD
Position : tPosition;
StringRef : tStringRef;
END;
PROCEDURE ErrorAttribute (Token: INTEGER; VAR Attribute: tScanAttribute);
}
GLOBAL {
FROM Strings IMPORT tString, AssignEmpty, Concatenate, Append;
FROM StringMem IMPORT PutString;
CONST TokString = ...;
PROCEDURE ErrorAttribute (Token: INTEGER; VAR Attribute: tScanAttribute);
BEGIN Attribute.StringRef := ...; END ErrorAttribute;
}
LOCAL {VAR String, S: tString;}
DEFINE StrCh1 = - {'\\\\t\\\\n}.
StrCh2 = - {"\\\\t\\\\n}.
START Str1, Str2
RULE
#STD# ' : {AssignEmpty (String); yyStart (Str1);}
#Str1# StrCh1+ :- {GetWord (S); Concatenate (String, S);}
#Str1# ' :- {yyStart (STD);
Attribute.StringRef := PutString (String);
RETURN TokString;}
.sp 0.5
#STD# \\\\" : {AssignEmpty (String); yyStart (Str2);}
#Str2# StrCh2+ :- {GetWord (S); Concatenate (String, S);}
#Str2# \\\\" :- {yyStart (STD);
Attribute.StringRef := PutString (String);
RETURN TokString;}
.sp 0.5
#Str1, Str2# \\\\t :- {Append (String, 11C); yyTab;}
#Str1, Str2# \\\\n :- {Error ("unclosed string"); yyEol (0); yyStart (STD);
Attribute.StringRef := PutString (String);
RETURN TokString;}
.)b
.pp
Again two separate start states are used to recognize the two forms of
Modula-2 strings. We presuppose the existence of a string handling module
.i Strings
and a string memory module
.i StringMem.
The procedure
.i PutString
stores a string in the string memory and returns a reference to it which can
be used as attribute of the token
.i TokString.
.bp
.sh 1 C
.pp
.sh 2 Comments
.lp
Problems to solve:
.ip -
unclosed comments
.ip -
newline characters
.ip -
tab characters
.lp
.(b L
Solution:
.sp 0.5
.FT
EOF {if (yyStartState == Comment) Error ("unclosed comment");}
DEFINE CmtCh = - {*\\\\t\\\\n}.
START Comment
RULE
"/*" :- {yyStart (Comment);}
#Comment# "*/" :- {yyStart (STD);}
#Comment# "*" | CmtCh + :- {}
.)b
.pp
Comments are processed in a separate start state called
.i Comment.
Everything is skipped in this state except closing comment
brackets which switch back to start state STD.
The single character '*'
which can start a closing comment bracket has to be skipped
separately. Otherwise closing comment brackets would not be
recognized because of the "longest match" rule of Rex.
An unclosed comment is indicated by reaching end of file while in start state
.i Comment.
We presuppose the existence of a procedure
.i Error
to report this condition.
We don't need to care about tab and newline characters other than excluding
them from the set
.i CmtCh
because the two rules needed for this problem are already predefined by
Rex:
.lp
.(b L
.FT
#Comment# \\\\t :- {yyTab;}
#Comment# \\\\n :- {yyEol (0);}
.)b
.bp
.sh 2 "Character Constants"
.lp
Problems to solve:
.ip -
conversion
.ip -
escape sequences
.ip -
tab characters
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "Positions.h"
typedef struct {
tPosition Position;
char Char;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokChar ...
void ErrorAttribute (Token, Attribute)
int Token;
tScanAttribute * Attribute;
{
Attribute->Char = '\\\\0';
}
}
LOCAL {char String [256];}
RULE
\&' \\\\t ' : {Attribute.Char = '\\\\t'; yyTab2 (1, 1); return TokChar;}
\&' ANY ' : {Attribute.Char = TokenPtr [1]; return TokChar;}
\&' \\\\\\\\ n ' : {Attribute.Char = '\\\\n'; return TokChar;}
\&' \\\\\\\\ t ' : {Attribute.Char = '\\\\t'; return TokChar;}
\&' \\\\\\\\ v ' : {Attribute.Char = '\\\\v'; return TokChar;}
\&' \\\\\\\\ b ' : {Attribute.Char = '\\\\b'; return TokChar;}
\&' \\\\\\\\ r ' : {Attribute.Char = '\\\\r'; return TokChar;}
\&' \\\\\\\\ f ' : {Attribute.Char = '\\\\f'; return TokChar;}
\&' \\\\\\\\ {0-7}[1-3] ' : {(void) GetWord (String);
sscanf (String + 2, "%o", & Attribute.Char);
return TokChar;}
\&' \\\\\\\\ ANY ' : {Attribute.Char = TokenPtr [2]; return TokChar;}
.)b
.pp
In this example the order of the rules is significant because the second rule
would also match the characters of the first one. The same holds for the
group of following rules with respect to the last rule.
.bp
.sh 2 Strings
.lp
Problems to solve:
.ip -
conversion
.ip -
escape sequences
.ip -
tab characters
.ip -
strings ranging over several lines
.ip -
source position
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "StringMem.h"
# include "Positions.h"
typedef struct {
tPosition Position;
tStringRef StringRef;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokString ...
void ErrorAttribute (Token, Attribute) ...
}
LOCAL {char String [256], S [5]; int L;}
DEFINE StrCh = - {"\\\\t\\\\n\\\\\\\\}.
START string
RULE
#STD# \\\\" : {yyStart (string); L = 0;}
#string# StrCh+ :- {L += GetWord (& String [L]);}
#string# \\\\t :- {String [L ++] = '\\\\t'); yyTab;}
#string# \\\\\\\\ n :- {String [L ++] = '\\\\n');}
#string# \\\\\\\\ t :- {String [L ++] = '\\\\t');}
#string# \\\\\\\\ v :- {String [L ++] = '\\\\v');}
#string# \\\\\\\\ b :- {String [L ++] = '\\\\b');}
#string# \\\\\\\\ r :- {String [L ++] = '\\\\r');}
#string# \\\\\\\\ f :- {String [L ++] = '\\\\f');}
#string# \\\\\\\\ {0-7}[1-3] :- {(void) GetWord (S);
sscanf (S + 1, "%o", & String [L ++]);}
#string# \\\\\\\\ ANY :- {(void) GetWord (S); String [L ++] = S [1];}
#string# \\\\\\\\ \\\\n :- {yyEol (0); String [L ++] = '\\\\n';}
#string# \\\\" :- {yyStart (STD); String [L] = '\\\\0';
Attribute.StringRef = PutString (String, L);
return TokString;}
#string# \\\\n :- {Error ("unclosed string"); yyEol (0);
yyStart (STD); String [L] = '\\\\0';
Attribute.StringRef = PutString (String, L);
return TokString;}
.)b
.pp
We presuppose the existence of a string memory module
.i StringMem.
The procedure
.i PutString
stores a string in the string memory and returns a reference to it which can
be used as attribute of the token
.i TokString.
.bp
.sh 1 Ada
.pp
.sh 2 Identifiers
.lp
Problems to solve:
.ip -
normalization of upper-case and lower-case letters
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "Idents.h"
# include "Positions.h"
typedef struct {
tPosition Position;
tIdent Ident;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokIdentifier ...
void ErrorAttribute (Token, Attribute) ...
}
LOCAL {char String [256]; int L;}
DEFINE letter = {A-Z a-z}.
digit = {0-9}.
RULE
letter (_? (letter | digit)+ )* : {L = GetLower (String);
Attribute.Ident = MakeIdent (String, L); return TokIdentifier;}
.)b
.pp
Normalization of upper-case and lower-case letters to lower-case is done by the
predefined operation
.i GetLower
of Rex.
.bp
.sh 2 "Numeric Literals"
.lp
Problems to solve:
.ip -
conversion
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "StringMem.h"
# include "Positions.h"
typedef struct {
tPosition Position;
tStringRef StringRef;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokDecimalLiteral ...
# define TokBasedLiteral ...
void ErrorAttribute (Token, Attribute) ...
}
DEFINE digit = {0-9} .
extended_digit = digit | {A-F a-f} .
integer = digit (_? digit) * .
based_integer = extended_digit (_? extended_digit) * .
exponent = {Ee} {+\\\\-} ? integer .
RULE
integer ("." integer) ? exponent ? :
{Attribute.StringRef = PutString (TokenPtr, TokenLength);
return TokDecimalLiteral;}
integer "#" based_integer ("." based_integer) ? "#" exponent ? :
{Attribute.StringRef = PutString (TokenPtr, TokenLength);
return TokBasedLiteral;}
.)b
.pp
The conversion of numeric literals to numeric values is not really solved in
the above solution. By storing the external representation of numeric
literals in a string memory the values are treated symbolically and true
conversion is delayed to be done by other compiler phases.
.bp
.sh 2 "Character Literals"
.lp
Problems to solve:
.ip -
no problems to solve for character literals
.ip -
distinction between character literals and apostrophes
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "Idents.h"
# include "Positions.h"
typedef struct {
tPosition Position;
char Char;
tIdent Ident;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokIdentifier ...
# define TokCharacterLiteral ...
# define TokApostrophe ...
# define TokLParenthesis ...
# define TokRParenthesis ...
void ErrorAttribute (Token, Attribute) ...
}
LOCAL {char String [256]; int L;}
DEFINE character = {\\\\ -~}.
letter = {A-Z a-z}.
digit = {0-9}.
START QUOTE
RULE
#STD# ' character ' : {Attribute.Char = TokenPtr [1];
return TokCharacterLiteral;}
#QUOTE# ' : {yyStart (STD); return TokApostrophe;}
"(" : {yyStart (STD); return TokLParenthesis;}
")" : {yyStart (QUOTE); return TokRParenthesis;}
letter (_? (letter | digit)+ )*
: {yyStart (QUOTE); L = GetLower (Word);
Attribute.Ident = MakeIdent (Word, L);
return TokIdentifier;}
.)b
.pp
The tokens
.i "Character Literal"
and
.i Apostrophe
can be distinguished in Ada only by consideration of some context. The
pathological input is for example something like
.lp
.(b
.FT
t'('a','b','c')
.)b
where t is a type_mark used as qualification for an aggregate of character
literals. It has to be taken care that 'a', 'b', and 'c' are recognized as
character literals and not '(', ',', and ','. Studying the Ada grammar one
can see that apostrophes are used following identifiers and closing
parentheses only. There are never character literals in this places.
.pp
This leads to the above solution with an additional start state called
.i QUOTE.
After recognition of an identifier or a closing parentheses the scanner is
switched to start state
.i QUOTE.
After recognition of all other tokens the scanner is switched back
to start state
.i STD.
Apostrophes are recognized only in start state
.i QUOTE
and character literals only in start state
.i STD.
All the other tokens are recognized in both start states.
.\" bp
.sh 2 "String Literals"
.lp
Problems to solve:
.ip -
conversion
.ip -
doubled delimiters
.ip -
unclosed strings (at end of lines)
.ip -
source position
.lp
.(b L
Solution:
.sp 0.5
.FT
EXPORT {
# include "StringMem.h"
# include "Positions.h"
typedef struct {
tPosition Position;
tStringRef StringRef;
} tScanAttribute;
extern void ErrorAttribute ();
}
GLOBAL {
# define TokStringLiteral ...
void ErrorAttribute (Token, Attribute) ...
}
LOCAL {char String [256]; int L;}
DEFINE StrCh = {\\\\ !#-~}.
START string
RULE
#STD# \\\\" : {yyStart (string); L = 0;}
#string# StrCh+ :- {L += GetWord (& String [L]);}
#string# \\\\"\\\\" :- {String [L ++] = '\\\\"');}
#string# \\\\" :- {yyStart (STD); String [L] = '\\\\0';
Attribute.StringRef = PutString (String, L);
return TokStringLiteral;}
#string# \\\\n :- {Error ("unclosed string"); yyEol (0);
yyStart (STD); String [L] = '\\\\0';
Attribute.StringRef = PutString (String, L);
return TokStringLiteral;}
.)b
.pp
We presuppose the existence of a string memory module
.i StringMem.
The procedure
.i PutString
stores a string in the string memory and returns a reference to it which can
be used as attribute of the token
.i TokString.
.bp
.sh 2 "Keywords"
.lp
Problems to solve:
.ip -
normalization of upper-case and lower-case letters
.lp
.(b L
Solution:
.sp 0.5
.FT
GLOBAL {
# define TokABORT ...
...
# define TokXOR ...
void ErrorAttribute (Token, Attribute) ...
}
DEFINE A = {Aa}.
...
Z = {Zz}.
RULE
A B O R T : {return TokABORT ;}
...
X O R : {return TokXOR ;}
.)b
.pp
The idea of the solution is to define identifiers A to Z to stand for the
corresponding upper-case as well as lower-case letters. Then specifying the
keywords in upper-case and spaced does the job.
.bp
.uh "Appendix: Complete Scanner Specification for Ada"
.sp
.lp
.nf
.FT
.sz 10
GLOBAL {
FROM Strings IMPORT tString, AssignEmpty, Concatenate, Append, Char;
FROM StringMem IMPORT tStringRef, PutString;
FROM Idents IMPORT tIdent, MakeIdent;
PROCEDURE ErrorAttribute (Token: INTEGER; VAR Attribute: tScanAttribute);
BEGIN END ErrorAttribute;
CONST
TokIdentifier = 1 ;
TokDecimalLiteral = 2 ;
TokBasedLiteral = 3 ;
TokCharLiteral = 4 ;
TokStringLiteral = 5 ;
TokArrow = 6 ; (* '=>' *)
TokDoubleDot = 7 ; (* '..' *)
TokDoubleStar = 8 ; (* '**' *)
TokBecomes = 9 ; (* ':=' *)
TokNotEqual = 10 ; (* '/=' *)
TokGreaterEqual = 11 ; (* '>=' *)
TokLessEqual = 12 ; (* '<=' *)
TokLLabelBracket = 13 ; (* '<<' *)
TokRLabelBracket = 14 ; (* '>>' *)
TokBox = 15 ; (* '<>' *)
TokAmpersand = 16 ; (* '&' *)
TokApostrophe = 17 ; (* ''' *)
TokLParenthesis = 18 ; (* '(' *)
TokRParenthesis = 19 ; (* ')' *)
TokStar = 20 ; (* '*' *)
TokPlus = 21 ; (* '+' *)
TokComma = 22 ; (* ',' *)
TokMinus = 23 ; (* '-' *)
TokDot = 24 ; (* '.' *)
TokDivide = 25 ; (* '/' *)
TokColon = 26 ; (* ':' *)
TokSemicolon = 27 ; (* ';' *)
TokLess = 28 ; (* '<' *)
TokEqual = 29 ; (* '=' *)
TokGreater = 30 ; (* '>' *)
TokBar = 31 ; (* '|' *)
TokABORT = 32 ; (* ABORT *)
TokABS = 33 ; (* ABS *)
TokACCEPT = 34 ; (* ACCEPT *)
TokACCESS = 35 ; (* ACCESS *)
TokALL = 36 ; (* ALL *)
TokAND = 37 ; (* AND *)
TokARRAY = 38 ; (* ARRAY *)
TokAT = 39 ; (* AT *)
TokBEGIN = 40 ; (* BEGIN *)
TokBODY = 41 ; (* BODY *)
TokCASE = 42 ; (* CASE *)
TokCONSTANT = 43 ; (* CONSTANT *)
TokDECLARE = 44 ; (* DECLARE *)
TokDELAY = 45 ; (* DELAY *)
TokDELTA = 46 ; (* DELTA *)
TokDIGITS = 47 ; (* DIGITS *)
TokDO = 48 ; (* DO *)
TokELSE = 49 ; (* ELSE *)
TokELSIF = 50 ; (* ELSIF *)
TokEND = 51 ; (* END *)
TokENTRY = 52 ; (* ENTRY *)
TokEXCEPTION = 53 ; (* EXCEPTION *)
TokEXIT = 54 ; (* EXIT *)
TokFOR = 55 ; (* FOR *)
TokFUNCTION = 56 ; (* FUNCTION *)
TokGENERIC = 57 ; (* GENERIC *)
TokGOTO = 58 ; (* GOTO *)
TokIF = 59 ; (* IF *)
TokIN = 60 ; (* IN *)
TokIS = 61 ; (* IS *)
TokLIMITED = 62 ; (* LIMITED *)
TokLOOP = 63 ; (* LOOP *)
TokMOD = 64 ; (* MOD *)
TokNEW = 65 ; (* NEW *)
TokNOT = 66 ; (* NOT *)
TokNULL = 67 ; (* NULL *)
TokOF = 68 ; (* OF *)
TokOR = 69 ; (* OR *)
TokOTHERS = 70 ; (* OTHERS *)
TokOUT = 71 ; (* OUT *)
TokPACKAGE = 72 ; (* PACKAGE *)
TokPRAGMA = 73 ; (* PRAGMA *)
TokPRIVATE = 74 ; (* PRIVATE *)
TokPROCEDURE = 75 ; (* PROCEDURE *)
TokRAISE = 76 ; (* RAISE *)
TokRANGE = 77 ; (* RANGE *)
TokRECORD = 78 ; (* RECORD *)
TokREM = 79 ; (* REM *)
TokRENAMES = 80 ; (* RENAMES *)
TokRETURN = 81 ; (* RETURN *)
TokREVERSE = 82 ; (* REVERSE *)
TokSELECT = 83 ; (* SELECT *)
TokSEPARATE = 84 ; (* SEPARATE *)
TokSUBTYPE = 85 ; (* SUBTYPE *)
TokTASK = 86 ; (* TASK *)
TokTERMINATE = 87 ; (* TERMINATE *)
TokTHEN = 88 ; (* THEN *)
TokTYPE = 89 ; (* TYPE *)
TokUSE = 90 ; (* USE *)
TokWHEN = 91 ; (* WHEN *)
TokWHILE = 92 ; (* WHILE *)
TokWITH = 93 ; (* WITH *)
TokXOR = 94 ; (* XOR *)
}
LOCAL {
VAR
String, S : tString ;
Word : tString ;
ident : tIdent ;
string : tStringRef ;
ch : CHAR ;
}
DEFINE
digit = {0-9} .
extended_digit = digit | {A-F a-f} .
letter = {a-z A-Z} .
character = {\\ -~} .
stringch = {\\ !#-~} .
integer = digit (_? digit) * .
based_integer = extended_digit (_? extended_digit) * .
illegal = - {\\ \\t\\n} .
A = {Aa} .
B = {Bb} .
C = {Cc} .
D = {Dd} .
E = {Ee} .
F = {Ff} .
G = {Gg} .
H = {Hh} .
I = {Ii} .
J = {Jj} .
K = {Kk} .
L = {Ll} .
M = {Mm} .
N = {Nn} .
O = {Oo} .
P = {Pp} .
Q = {Qq} .
R = {Rr} .
S = {Ss} .
T = {Tt} .
U = {Uu} .
V = {Vv} .
W = {Ww} .
X = {Xx} .
Y = {Yy} .
Z = {Zz} .
START STRING, QUOTE
RULE
NOT #STRING# integer ("." integer) ? (E {+\\-} ? integer) ?
: {yyStart (STD); GetWord (Word);
string := PutString (Word);
RETURN TokDecimalLiteral;}
NOT #STRING#
integer "#" based_integer ("." based_integer) ? "#" (E {+\\-} ? integer) ?
: {yyStart (STD); GetWord (Word);
string := PutString (Word);
RETURN TokBasedLiteral;}
#STD# ' character ': {GetWord (String); ch := Char (String, 2);
RETURN TokCharLiteral;}
NOT #STRING# \\" : {yyStart (STRING); AssignEmpty (String);}
#STRING# stringch + :- {GetWord (S); Concatenate (String, S);}
#STRING# \\"\\" :- {Append (String, '"');}
#STRING# \\" :- {yyStart (STD); string := PutString (String);
RETURN TokStringLiteral;}
#STRING# \\t :- {Append (String, 11C); yyTab;}
#STRING# \\n :- {(* Error ("unclosed string"); *) yyEol (0);
yyStart (STD); string := PutString (String);
RETURN TokStringLiteral;}
NOT #STRING# "--" ANY * : {}
NOT #STRING# "=>" : {yyStart (STD); RETURN TokArrow ;}
NOT #STRING# ".." : {yyStart (STD); RETURN TokDoubleDot ;}
NOT #STRING# "**" : {yyStart (STD); RETURN TokDoubleStar ;}
NOT #STRING# ":=" : {yyStart (STD); RETURN TokBecomes ;}
NOT #STRING# "/=" : {yyStart (STD); RETURN TokNotEqual ;}
NOT #STRING# ">=" : {yyStart (STD); RETURN TokGreaterEqual ;}
NOT #STRING# "<=" : {yyStart (STD); RETURN TokLessEqual ;}
NOT #STRING# "<<" : {yyStart (STD); RETURN TokLLabelBracket ;}
NOT #STRING# ">>" : {yyStart (STD); RETURN TokRLabelBracket ;}
NOT #STRING# "<>" : {yyStart (STD); RETURN TokBox ;}
NOT #STRING# "&" : {yyStart (STD); RETURN TokAmpersand ;}
#QUOTE# "'" : {yyStart (STD); RETURN TokApostrophe ;}
NOT #STRING# "(" : {yyStart (STD); RETURN TokLParenthesis ;}
NOT #STRING# ")" : {yyStart (QUOTE); RETURN TokRParenthesis ;}
NOT #STRING# "*" : {yyStart (STD); RETURN TokStar ;}
NOT #STRING# "+" : {yyStart (STD); RETURN TokPlus ;}
NOT #STRING# "," : {yyStart (STD); RETURN TokComma ;}
NOT #STRING# "-" : {yyStart (STD); RETURN TokMinus ;}
NOT #STRING# "." : {yyStart (STD); RETURN TokDot ;}
NOT #STRING# "/" : {yyStart (STD); RETURN TokDivide ;}
NOT #STRING# ":" : {yyStart (STD); RETURN TokColon ;}
NOT #STRING# ";" : {yyStart (STD); RETURN TokSemicolon ;}
NOT #STRING# "<" : {yyStart (STD); RETURN TokLess ;}
NOT #STRING# "=" : {yyStart (STD); RETURN TokEqual ;}
NOT #STRING# ">" : {yyStart (STD); RETURN TokGreater ;}
NOT #STRING# "|" : {yyStart (STD); RETURN TokBar ;}
NOT #STRING# A B O R T : {yyStart (STD); RETURN TokABORT ;}
NOT #STRING# A B S : {yyStart (STD); RETURN TokABS ;}
NOT #STRING# A C C E P T : {yyStart (STD); RETURN TokACCEPT ;}
NOT #STRING# A C C E S S : {yyStart (STD); RETURN TokACCESS ;}
NOT #STRING# A L L : {yyStart (STD); RETURN TokALL ;}
NOT #STRING# A N D : {yyStart (STD); RETURN TokAND ;}
NOT #STRING# A R R A Y : {yyStart (STD); RETURN TokARRAY ;}
NOT #STRING# A T : {yyStart (STD); RETURN TokAT ;}
NOT #STRING# B E G I N : {yyStart (STD); RETURN TokBEGIN ;}
NOT #STRING# B O D Y : {yyStart (STD); RETURN TokBODY ;}
NOT #STRING# C A S E : {yyStart (STD); RETURN TokCASE ;}
NOT #STRING# C O N S T A N T : {yyStart (STD); RETURN TokCONSTANT ;}
NOT #STRING# D E C L A R E : {yyStart (STD); RETURN TokDECLARE ;}
NOT #STRING# D E L A Y : {yyStart (STD); RETURN TokDELAY ;}
NOT #STRING# D E L T A : {yyStart (STD); RETURN TokDELTA ;}
NOT #STRING# D I G I T S : {yyStart (STD); RETURN TokDIGITS ;}
NOT #STRING# D O : {yyStart (STD); RETURN TokDO ;}
NOT #STRING# E L S E : {yyStart (STD); RETURN TokELSE ;}
NOT #STRING# E L S I F : {yyStart (STD); RETURN TokELSIF ;}
NOT #STRING# E N D : {yyStart (STD); RETURN TokEND ;}
NOT #STRING# E N T R Y : {yyStart (STD); RETURN TokENTRY ;}
NOT #STRING# E X C E P T I O N : {yyStart (STD); RETURN TokEXCEPTION ;}
NOT #STRING# E X I T : {yyStart (STD); RETURN TokEXIT ;}
NOT #STRING# F O R : {yyStart (STD); RETURN TokFOR ;}
NOT #STRING# F U N C T I O N : {yyStart (STD); RETURN TokFUNCTION ;}
NOT #STRING# G E N E R I C : {yyStart (STD); RETURN TokGENERIC ;}
NOT #STRING# G O T O : {yyStart (STD); RETURN TokGOTO ;}
NOT #STRING# I F : {yyStart (STD); RETURN TokIF ;}
NOT #STRING# I N : {yyStart (STD); RETURN TokIN ;}
NOT #STRING# I S : {yyStart (STD); RETURN TokIS ;}
NOT #STRING# L I M I T E D : {yyStart (STD); RETURN TokLIMITED ;}
NOT #STRING# L O O P : {yyStart (STD); RETURN TokLOOP ;}
NOT #STRING# M O D : {yyStart (STD); RETURN TokMOD ;}
NOT #STRING# N E W : {yyStart (STD); RETURN TokNEW ;}
NOT #STRING# N O T : {yyStart (STD); RETURN TokNOT ;}
NOT #STRING# N U L L : {yyStart (STD); RETURN TokNULL ;}
NOT #STRING# O F : {yyStart (STD); RETURN TokOF ;}
NOT #STRING# O R : {yyStart (STD); RETURN TokOR ;}
NOT #STRING# O T H E R S : {yyStart (STD); RETURN TokOTHERS ;}
NOT #STRING# O U T : {yyStart (STD); RETURN TokOUT ;}
NOT #STRING# P A C K A G E : {yyStart (STD); RETURN TokPACKAGE ;}
NOT #STRING# P R A G M A : {yyStart (STD); RETURN TokPRAGMA ;}
NOT #STRING# P R I V A T E : {yyStart (STD); RETURN TokPRIVATE ;}
NOT #STRING# P R O C E D U R E : {yyStart (STD); RETURN TokPROCEDURE ;}
NOT #STRING# R A I S E : {yyStart (STD); RETURN TokRAISE ;}
NOT #STRING# R A N G E : {yyStart (STD); RETURN TokRANGE ;}
NOT #STRING# R E C O R D : {yyStart (STD); RETURN TokRECORD ;}
NOT #STRING# R E M : {yyStart (STD); RETURN TokREM ;}
NOT #STRING# R E N A M E S : {yyStart (STD); RETURN TokRENAMES ;}
NOT #STRING# R E T U R N : {yyStart (STD); RETURN TokRETURN ;}
NOT #STRING# R E V E R S E : {yyStart (STD); RETURN TokREVERSE ;}
NOT #STRING# S E L E C T : {yyStart (STD); RETURN TokSELECT ;}
NOT #STRING# S E P A R A T E : {yyStart (STD); RETURN TokSEPARATE ;}
NOT #STRING# S U B T Y P E : {yyStart (STD); RETURN TokSUBTYPE ;}
NOT #STRING# T A S K : {yyStart (STD); RETURN TokTASK ;}
NOT #STRING# T E R M I N A T E : {yyStart (STD); RETURN TokTERMINATE ;}
NOT #STRING# T H E N : {yyStart (STD); RETURN TokTHEN ;}
NOT #STRING# T Y P E : {yyStart (STD); RETURN TokTYPE ;}
NOT #STRING# U S E : {yyStart (STD); RETURN TokUSE ;}
NOT #STRING# W H E N : {yyStart (STD); RETURN TokWHEN ;}
NOT #STRING# W H I L E : {yyStart (STD); RETURN TokWHILE ;}
NOT #STRING# W I T H : {yyStart (STD); RETURN TokWITH ;}
NOT #STRING# X O R : {yyStart (STD); RETURN TokXOR ;}
NOT #STRING# letter (_? (letter | digit)+ )*
: {yyStart (QUOTE); GetLower (Word);
ident := MakeIdent (Word);
RETURN TokIdentifier;}
NOT #STRING# illegal : {IO.WriteS (IO.StdOutput, "illegal character: ");
yyEcho; IO.WriteNl (IO.StdOutput);}
.fi
.sz 12
.[]
.[-
.ds [F Gro87
.ds [A J\*(p] Grosch
.ds [T Rex - A Scanner Generator
.ds [I GMD Forschungsstelle an der Universit\\*:at Karlsruhe
.ds [R Compiler Generation Report No. 5
.ds [N 5
.ds [D Dec. 1987
.][
.bp 1
.lp
.b Contents
.sp
.xp